The purpose of this workflow is to start exploring the LPS sensitivity data and to run cape on it to see if there any informative interactions.

rm(list = ls())

library(here)

all.fun <- list.files(here("Code"), full.names = TRUE, pattern = ".R")
for(i in 1:length(all.fun)){source(all.fun[i])}

all.packages <- c("pheatmap", "qtl2", "cluster")
load_libraries(all.packages)

load_latest_cape(here("../../../git_repositories/cape"))

We need to take the non-numeric phenotypes out of the phenotype file and put them into covariates. I also added ‘NA’ to the na.strings argument in the yaml file.

#if this is the first time through, create a new data directory

mod.data.dir <- here("Data", "qtl2_files_mod")

if(!file.exists(mod.data.dir)){
    #duplicate all files
    file.copy(here("Data", "qtl2_files"), mod.data.dir, recursive = TRUE)

    orig.pheno <- read.csv(here("Data", "qtl2_files", "lps_pheno.csv"))
    orig.covar <- read.csv(here("Data", "qtl2_files", "lps_covar.csv"))

    nonnum.pheno.col <- c("strain", "category")
    num.pheno.col <- setdiff(colnames(orig.pheno), nonnum.pheno.col)
    nonnum.pheno <- cbind(orig.covar, orig.pheno[,nonnum.pheno.col])

    num.pheno <- orig.pheno[,num.pheno.col]

    write.table(num.pheno, here("Data", "qtl2_files_mod", "qtl2_files", "lps_pheno.csv"),
        sep = ",", quote = FALSE, row.names = FALSE)
    write.table(nonnum.pheno, here("Data", "qtl2_files_mod", "qtl2_files", "lps_covar.csv"), 
        sep = ",", quote = FALSE, row.names = FALSE)
}
#add 'NA' to the na.strings arguments in the yaml file to 
#suppress the warning about "NAs introduced by coercion"
lps <- read_cross2(here("Data", "qtl2_files_mod", "qtl2_files", "lps.yaml"))
lps_cape <- qtl2_to_cape(lps)
## Converting sex to numeric.
## Converting cross_direction to numeric.
## Converting strain to numeric.
## Converting category to numeric.
## Converting genoprobs to array...
data_obj <- lps_cape$data_obj
geno_obj <- lps_cape$geno_obj

gene.info <- read.delim(here("Data", "general", "mouse_gene_info.txt"))

Body Temperature

Body temperature was measured hourly for seven hours post LPS injection. The following plot shows a heatmap of the temperatures after the injection. There are several patterns. Some temperatures stay fairly steady. Others dip a bit after a few hours, and some really drop in the last couple hours of the experiment. In some animals the temperatures go up initially, and others hold more steady before dropping.

temp.idx <- grep("X", colnames(data_obj$pheno))
temp.label <- gsub("X", "", colnames(data_obj$pheno)[temp.idx])
temp <- data_obj$pheno[,temp.idx]

pheatmap(temp, cluster_cols = FALSE)

k = 7

We can cluster the tamperature profiles hierarchically to see the different patterns. The following heatmap shows the cluster assignments for 7 clusters.

temp_clust <- hclust(dist(temp))
cl <- cutree(temp_clust, k = k)

#reorder cluster based on mean temperature
cl_temp <- lapply(1:k, function(x) rowMeans(temp[which(cl == x),,drop=FALSE]))
#boxplot(cl_temp)
cl_mean <- sapply(cl_temp, mean)
cl_order <- order(cl_mean, decreasing = TRUE)
#boxplot(cl_temp[cl_order])
ordered_cl  <- rep(NA, length(cl))
for(cl_k in 1:k){
    ordered_cl[which(cl == cl_order[cl_k])] <- cl_k
}

cl_annot <- data.frame("cluster" = as.factor(ordered_cl))
rownames(cl_annot) <- rownames(temp)
pheatmap(temp, annotation_row = cl_annot, cluster_cols = FALSE)

If we plot the profiles out for each cluster independently, we can better see the different profiles. We’ve ordered them here by the mean temperature for each cluster for ease of comparison.

layout.mat <- get.layout.mat(k)
layout(layout.mat)
for(cl.k in 1:k){
    plot.new()
    plot.window(xlim = c(1,ncol(temp)), ylim = c(min(temp),  max(temp)))
    cl.idx <- which(ordered_cl == cl.k)
    axis(1);mtext("Hrs", side = 1, line = 2.5)
    axis(2);mtext("Temp (C)", side = 2, line = 2.5)
    mtext(paste("Cluster", cl.k), side = 3)
    for(i in 1:length(cl.idx)){
        points(1:ncol(temp), temp[cl.idx[i],], col = ordered_cl[cl.idx[i]], type = "b", pch = 16)
    }
}

If we plot the first two principal components of the temperature matrix against each other, we see that the clusters separate along the first principal component, which accounts for 84% of the variation in temperature. This suggests that the first prinicipal component might be a good trait for mapping.

temp.decomp <- plot.decomp(temp, plot.results = FALSE)
#temp.decomp <- plot.decomp(temp, cols = data_obj$pheno[,"sex"]+1) #no effect of sex
#I am flipping this so it is positively correlated with temperature
temp.pc <- temp.decomp$u[,1,drop=FALSE]*-1
colnames(temp.pc) <- "Temp_PC1"
rownames(temp.pc) <- rownames(temp)

plot(temp.decomp$u*-1, col = ordered_cl, pch = 16, xlab = "PC1", ylab = "PC2")

#also use the second PC
temp.pc2  <- temp.decomp$u[,2,drop=FALSE]*-1 #also flipping this one for ease of interpretation later
colnames(temp.pc2) <- "Temp_PC2"
rownames(temp.pc2) <- rownames(temp)

data_obj$pheno <- cbind(data_obj$pheno, temp.pc, temp.pc2)
#boxplot(temp.decomp$u[,1]~cl)

The first PC of the temperature matrix is more heavily weighted on later temperature measurements than on earlier temperature measurements.

#quartz(width = 9, height = 5)
par(mfrow = c(2,4))
for(i in 1:ncol(temp)){
    plot.with.model(temp[,i], temp.pc, main = colnames(temp)[i], xlab = colnames(temp)[i],
        ylab = "Temp PC1")
}

The second PC of the temperature matrix is more heavily weighted on the mid point temperatures.

#quartz(width = 9, height = 5)
par(mfrow = c(2,4))
for(i in 1:ncol(temp)){
    plot.with.model(temp[,i], temp.pc2, main = colnames(temp)[i], xlab = colnames(temp)[i],
        ylab = "Temp PC2")
}

The first temperature PC is highly correlated with the minimum temperature each mouse achieved, whereas the second temperature PC is highly correlated with the maximum temperature each mouse achieved. Maybe we should use both PCs with cape.

temp.max <- apply(temp, 1, max)
temp.min <- apply(temp, 1, min)

par(mfrow = c(1,2))
plot.with.model(temp.min, temp.pc, main = "Temp PC1 vs. Temperature Miminum")
plot.with.model(temp.max, temp.pc2, main = "Temp PC2 vs. Temperature Maximum")

If we look just at the values of PC1 of the temperature matrix, we see that some animals have negative values. The mice with the most negative values in PC1 are in cluster 1 above. These are the most reistant to LPS. Their temperature barely changes at all. The mice with the most positive values in PC1 are in clusters 6 and 7. These mice had the most dramatic drops in temperature and are the most sensitive to LPS. Thus, the value of this PC describes how sensitive or resistant the mice are to LPS as determined by their temperature profile.

#quartz(width = 9, height = 5.5)
pc.order <- order(temp.pc)
plot(temp.pc[pc.order], type = "h", col = ordered_cl[pc.order], lwd = 3,
    main = "PC1 by cluster", ylab = "Temp PC1")
abline(h = 0)
legend("topleft", legend = paste("Cluster", 1:k), lty = 1, lwd = 3, col = 1:k)
arrows(x0 = 50, x1 = 20, y0 = -0.25, lwd = 3)
text(x = 50, y = -0.265, labels = "More LPS sensitive", adj = 1)
arrows(x0 = 60, x1 = 90, y0 = -0.25, lwd = 3)
text(x = 60, y = -0.265, labels = "More LPS resistant", adj = 0)

Trait Correlations

The animals in the upper ranges of the first PC of the temperature matrix, also tend to have higher measurements on in the immune matrix.

immune <- data_obj$pheno[,c("Il.1b", "Il.6", "TNF", "IFN.b")]
norm.immune <- apply(immune, 2, rankZ)

par(mfrow = c(2,2))
for(i in 1:ncol(norm.immune)){
    plot.with.model(temp.pc, immune[,i], xlab = "Temperature PC 1",
        ylab = colnames(norm.immune)[i],
        main = colnames(norm.immune)[i], report = "cor.test",
        col = ordered_cl)
}

These correlations extend the range of the values. If we rank Z normalize the values, the correlations are even stronger. There is a floor effect in each of the immune molecules, which is most pronounce in IFN beta. There are animals with a range of temperature profiles that did not have measurable IFN beta.

par(mfrow = c(2,2))
for(i in 1:ncol(norm.immune)){
    plot.with.model(temp.pc, norm.immune[,i], xlab = "Temperature PC 1",
        ylab = paste("Normalized", colnames(norm.immune)[i]),
        main = paste("Normalized", colnames(norm.immune)[i]), report = "cor.test",
        col = ordered_cl)
}

The second temperature PC2 is negatively correlated with the cytokines.

par(mfrow = c(2,2))
for(i in 1:ncol(norm.immune)){
    plot.with.model(temp.pc2, norm.immune[,i], xlab = "Temperature PC 2",
        ylab = colnames(norm.immune)[i],
        main = colnames(norm.immune)[i], report = "cor.test",
        col = ordered_cl)
}

Individual Trait Mapping

As an initial test, we mapped each of the normalized traits.

The first PC of the temperature matrix has a suggestive QTL on proximal Chr 17. Maybe the MHC locus? IL6 and TNF also map to Chr 17, although the QTLs look more distal. Neither IL1 beta nor IFN beta map anywhere.

genoprobs <- calc_genoprob(lps)
full.pheno <- cbind(temp.pc, temp.pc2, norm.immune)
temp.scan <- scan1(genoprobs, full.pheno)
temp.perm <- scan1perm(genoprobs, full.pheno, n_perm = 100)
alpha <- c(0.05, 0.01, 0.1)
sig.level <- summary(temp.perm, alpha = alpha)

for(ph in 1:ncol(full.pheno)){
    cat("###", colnames(full.pheno)[ph], "\n")
   plot(temp.scan, lodcol = ph, map = lps$pmap, ylim = c(0, max(c(max(temp.scan), max(sig.level)*1.05))),
        main = colnames(full.pheno)[ph])
    plot.dim <- par("usr")
    plot.width <- plot.dim[2] - plot.dim[1]
    text.x <- plot.dim[2]-(plot.width*0.007) 
    line.stop <- plot.dim[2]-(plot.width*0.04) 
    segments(x0 = 0, x1 = line.stop, y0 = sig.level[,ph], lty = 2)
    text(x = text.x, y = sig.level[,ph], labels = alpha, adj = 1)
    cat("\n\n")
}

Temp_PC1

Temp_PC2

Il.1b

Il.6

TNF

IFN.b

CAPE

Now we run cape with the combined temperature PC1 and immune molecule matrix. Please see cape_results.html for results from cape.

results_dir <- here("Results", "cape_2ET")
final_obj <- run_cape(data_obj, geno_obj, 
    param_file = file.path(results_dir, "0_lps.parameters_0.yml"),
    results_path = results_dir)
#run cape with a coding for heterosis.
#AA and BB are 0 and A/B is 1
code_heterosis <- function(marker_geno){
    new.geno <- matrix(0, nrow = nrow(marker_geno), ncol = ncol(marker_geno))
    dimnames(new.geno) <- dimnames(marker_geno)
    which.hom <- union(which(marker_geno[,1] > 0.9), which(marker_geno[,2] > 0.9))
    new.geno[which.hom,1] <- 1
    which.het <- intersect(which(marker_geno[,1] < 0.6), which(marker_geno[,2] < 0.6))
    new.geno[which.het,2] <- 1
    return(new.geno)
}

heterosis_markers <- lapply(1:dim(geno_obj)[3], function(x) code_heterosis(geno_obj[,,x]))
heterosis_geno <- abind(heterosis_markers, along = 3)
dimnames(heterosis_geno) <- dimnames(geno_obj)

results_dir <- here("Results", "cape_heterosis")
final_obj <- run_cape(data_obj, heterosis_geno, 
    param_file = file.path(results_dir, "0_lps.parameters_0.yml"),
    results_path = results_dir)
#run cape with a coding for heterosis only on chromosome 17.
#AA and BB are 0 and A/B is 1
code_heterosis <- function(marker_geno, recode = TRUE){
    if(!recode){
        return(marker_geno)
    }
    new.geno <- matrix(0, nrow = nrow(marker_geno), ncol = ncol(marker_geno))
    dimnames(new.geno) <- dimnames(marker_geno)
    which.hom <- union(which(marker_geno[,1] > 0.9), which(marker_geno[,2] > 0.9))
    new.geno[which.hom,1] <- 1
    which.het <- intersect(which(marker_geno[,1] < 0.6), which(marker_geno[,2] < 0.6))
    new.geno[which.het,2] <- 1
    return(new.geno)
}

do.recode <- rep(FALSE, dim(geno_obj)[[3]])
chr17.idx <- which(data_obj$chromosome == 17)
do.recode[chr17.idx] <- TRUE

heterosis_markers <- lapply(1:dim(geno_obj)[3], function(x) code_heterosis(geno_obj[,,x], recode = do.recode[x]))
heterosis_geno <- abind(heterosis_markers, along = 3)
dimnames(heterosis_geno) <- dimnames(geno_obj)

results_dir <- here("Results", "cape_heterosis17")
final_obj <- run_cape(data_obj, heterosis_geno, 
    param_file = file.path(results_dir, "0_lps.parameters_0.yml"),
    results_path = results_dir)

Hardy-Weinberg and LD

It looks as if there might be some deviations from Hardy-Weinberg equilibrium, particularly in the Fshr locus on Chr 17. There is also the possibility of long-range LD with this locus suggesting lethal combinations of alleles. This section explores that possibility.

chr = 17; pos = 89292380 #Fshr position

fshr_loc <- find_marker(lps$pmap, chr = chr, pos = pos)

fshr_int <- pull_genoprobint(genoprobs, lps$pmap, chr = chr, int = c(pos-50e6, pos+50e6))
fshr_geno <- apply(fshr_int[[1]], 3, function(x) apply(x, 1, function(y) y*c(0, 0.5, 1)))
pheatmap(fshr_geno, cluster_cols = FALSE)

pdf("~/Desktop/Fshr_loc.pdf", width = 15, height = 7)
pheatmap(cor(fshr_geno), cluster_rows = FALSE, cluster_cols = FALSE)
dev.off()


pheatmap(fshr_int[[1]][,1,], cluster_cols = FALSE)
pheatmap(fshr_int[[1]][,2,], cluster_cols = FALSE)
pheatmap(fshr_int[[1]][,3,], cluster_cols = FALSE)
#test whether a given marker is in HW equilibrium
#based on the ideal frequencies for an F2
find_hw <- function(marker_geno){
    #marker_prob <- apply(marker_geno, 1, function(x) mean(x*c(0, 0.5, 1)))
    #hist(marker_prob)
    #obs_pq <- colMeans(marker_geno)
    #barplot(obs_pq)
    #p_freq <- mean(apply(marker_geno, 1, function(x) sum(x[1] + x[2]/2)))
    #q_freq <- mean(apply(marker_geno, 1, function(x) sum(x[3] + x[2]/2)))
    p_freq = 0.5 #set all to 0.5 because this is an F2
    q_freq <- 1-p_freq
    hw_exp <- c(p_freq^2, 2*p_freq*q_freq, q_freq^2)*nrow(marker_geno)
    obs <- colSums(marker_geno)
    #barplot(hw_pq)
    #hw.diff <- hw_pq - obs_pq
    #barplot(rbind(hw_exp, obs), beside = TRUE)
    #barplot(hw.diff)
    #barplot(rbind(hw_pq, obs_pq, hw.diff), beside = TRUE)
    chisq <- sum(sapply(1:length(hw_exp), function(x) (obs[x] - hw_exp[x])^2/hw_exp[x]))
    #return(chisq.p)
    return(chisq)
}

plot_hw <- function(marker_geno, marker_name, main = ""){
    p_freq = 0.5 #set all to 0.5 because this is an F2
    q_freq <- 1-p_freq
    hw_exp <- c(p_freq^2, 2*p_freq*q_freq, q_freq^2)*nrow(marker_geno)
    obs <- colSums(marker_geno)
    chisq <- sum(sapply(1:length(hw_exp), function(x) (obs[x] - hw_exp[x])^2/hw_exp[x]))
    chisq.p <- 1-pchisq(chisq, 1)
    maxy <- max(c(hw_exp, obs))
    freq.table <- rbind(hw_exp, obs)
    rownames(freq.table) <- c("Expected", "Observed")
    a <- barplot(rbind(hw_exp, obs), beside = TRUE, main = paste(main, "\np =", signif(chisq.p, 2)), 
        col = c("#7fc97f", "#beaed4"), ylab = "Count", ylim = c(0, maxy*1.1))
    text(a[1,], y = hw_exp+(maxy*0.025), labels = signif(hw_exp, 2))
    text(a[2,], y = obs+(maxy*0.025), labels = signif(obs, 2))
    legend("topleft", fill =  c("#7fc97f", "#beaed4"), legend = c("Expected", "Observed"))
    result <- list("Frequencies" = freq.table, "p" = chisq.p, "adj.p" = adj.p)
    invisible(result)
}

#find LD between two markers
#The expected paired genotype frequencies are based
#on the ideal for an F2
find_D <- function(marker1_geno, marker2_geno){
    #marker_prob <- apply(marker_geno, 1, function(x) mean(x*c(0, 0.5, 1)))
    #hist(marker_prob)
    #obs_pq <- colMeans(marker_geno)
    #barplot(obs_pq)
    #a_freq <- matrix(colMeans(marker1_geno), ncol = 1)
    #b_freq <- matrix(colMeans(marker2_geno), nrow = 1)
    a_freq <- matrix(c(0.25, 0.5, 0.25), ncol = 1)
    b_freq <- matrix(c(0.25, 0.5, 0.25), nrow = 1)
    exp_freq <- a_freq%*%b_freq*nrow(marker1_geno)
    obs_freq <- t(marker1_geno)%*%marker2_geno
    #exp_freq - obs_freq
    test <- fisher.test(round(obs_freq))
    return(test$p.value)
}

#plot expected and observed genotype frequencie for two markers
#The expected is for the ideal frequencies from an F2.
plot_LD_freq <- function(marker1, marker2, main = ""){
    
    obs_freq <- t(marker1)%*%marker2

    a_freq <- matrix(c(0.25, 0.5, 0.25), ncol = 1)
    b_freq <- matrix(c(0.25, 0.5, 0.25), nrow = 1)
    exp_freq <- a_freq%*%b_freq*nrow(marker1)
    geno_names <- apply(cbind(rep(c("BB", "BM", "MM"), each = 3), rep(c("BB", "BM", "MM"), 3)), 1, function(x) paste(x, collapse = "_"))

    D.p <- find_D(marker1, marker2)

    barplot(rbind(as.vector(exp_freq), as.vector(obs_freq)), beside = TRUE, ylim = c(0, 35),
        names = geno_names, col = c("#7fc97f", "#beaed4"), main = paste(main, "\np =", signif(D.p, 2)))
    legend("topright", fill =  c("#7fc97f", "#beaed4"), legend = c("Expected", "Observed"))
}

gene_hw <- function(gene.name){
    gene.idx <- which(gene.info[,"external_gene_name"] == gene.name)
    if(length(gene.idx) == 0){
        return(paste(gene.name, "not found"))
    }
    gene.chr <- gene.info[gene.idx,"chromosome_name"]
    gene.pos <- gene.info[gene.idx,"start_position"]

    gene_loc <- find_marker(lps$pmap, chr = gene.chr, pos = gene.pos)
    gene_geno <- pull_genoprobpos(genoprobs, lps$pmap, chr = gene.chr, pos = gene.pos)
    gene_prob <- apply(gene_geno, 1, function(x) sum(x*c(0, 0.5, 1)))

    
    hw.stats <- plot_hw(gene_geno, gene_loc, main = paste(gene.name, "locus"))
    hw.stats.f <- plot_hw(gene_geno[f.idx,], gene_loc, main = paste(gene.name, "locus; Females"))
    hw.stats.m <- plot_hw(gene_geno[m.idx,], gene_loc, main = paste(gene.name, "locus; Males"))

    result <- list("gene.name" = gene.name, "chromosome" = gene.chr, "Position" = gene.pos,
        "nearest_marker" = gene_loc, "genotype" = gene_geno, "genotype_calls" = gene_prob,
        "Frequencies" = hw.stats$Frequencies, "p" = hw.stats$p, adj.p = hw.stats$adj.p)
    invisible(result)
}


adj_p <- function(hw_p, marker_name){
    adj.p <- p.adjust(unlist(hw_p[1:19]), "fdr")
    marker.adj.p <- adj.p[marker_name]
    return(marker.adj.p)
}

We then tested all markers for Hardy-Weinberg equilibrium compared to ideal F2 frequencies. It looks as if there might be some markers that are out of equilibrium, and it looks as if this effect is present particularly in the females.

#test all markers for HW equilibrium
hw_by_chr <- hw_p <- vector(mode = "list", length = length(genoprobs))
names(hw_by_chr) <- names(genoprobs)
for(ch in 1:(length(genoprobs)-1)){
    hw_by_chr[[ch]] <- apply(genoprobs[[ch]], 3, function(x) find_hw(x[]))
    hw_p[[ch]] <- sapply(hw_by_chr[[ch]], function(x) 1-pchisq(x, df = 1))
    #pheatmap(t(hw_by_chr[[ch]]), cluster_cols = FALSE, cluster_rows = FALSE)
}

#also test HW for males and females separately. It seems that for 
#some markers there might be a sex effect.
f.idx <- which(data_obj$pheno[,"sex"] == 0)
m.idx <- which(data_obj$pheno[,"sex"] == 1)

hw_by_chr <- hw_p <- hw_by_chr_f <- hw_f_p <- hw_by_chr_m <- hw_m_p <- vector(mode = "list", length = length(genoprobs))
names(hw_by_chr) <- names(hw_by_chr_f) <- names(hw_by_chr_m) <- names(genoprobs)
for(ch in 1:(length(genoprobs)-1)){

    hw_by_chr[[ch]] <- apply(genoprobs[[ch]], 3, find_hw)
    hw_p[[ch]] <- sapply(hw_by_chr[[ch]], function(x) 1-pchisq(x, df = 1))

    hw_by_chr_f[[ch]] <- apply(genoprobs[[ch]], 3, function(x) find_hw(x[f.idx,]))
    hw_f_p[[ch]] <- sapply(hw_by_chr_f[[ch]], function(x) 1-pchisq(x, df = 1))

    hw_by_chr_m[[ch]] <- apply(genoprobs[[ch]], 3, function(x) find_hw(x[m.idx,]))
    hw_m_p[[ch]] <- sapply(hw_by_chr_m[[ch]], function(x) 1-pchisq(x, df = 1))
}


par(mfrow = c(1,3))
#hist(unlist(hw_p), main = "HW p value distribution across all markers", xlab = "p value")
qqunif.plot(unlist(hw_p), plot.label = "QQ plot of HW p values")
#hist(-log10(unlist(hw_p)), main = "Distribution of -log10(p values)",
#    xlab = "-log10(p value)")
qqunif.plot(unlist(hw_f_p), plot.label = "QQ plot of HW p values in Females")
qqunif.plot(unlist(hw_m_p), plot.label = "QQ plot of HW p values in Males")

The bar plot below shows the number of significant loci overall, and in males and females.

pval.thresh <- 0.05
adj.p <- p.adjust(unlist(hw_p), "fdr")
adj.p.f <- p.adjust(unlist(hw_f_p), "fdr")
adj.p.m <- p.adjust(unlist(hw_m_p), "fdr")
sig.idx <- which(adj.p <= pval.thresh)
sig.idx.f <- which(adj.p.f <= pval.thresh)
sig.idx.m <- which(adj.p.m <= pval.thresh)

barplot_with_num(c("Overall" = length(sig.idx), 
    "Females" = length(sig.idx.f), "Males" = length(sig.idx.m)))

Non-equilibrium by location

The plots below show where in the genome these significantly non-equilibrium markers are.

The spike on chr 4 for the females looks suspicious to me. Could this be the result of genotyping errors? The spike is totally absent in the males, though, so maybe there is something biological going on.

plot_sig_hw <- function(hw_p_list, main = "", ymax = 5.5){
    chr.mat <- matrix(1:21, nrow = 1)
    layout(chr.mat)
    for(ch in 1:19){
        if(ch == 1){
            par(mar = c(4,2,4,0))
            plot.new()
            plot.window(xlim = c(1, length(hw_p_list[[ch]])), ylim = c(0, ymax))
            axis(2, line = -2.5)
            mtext("-log10(p value)", side = 2, line = 0.5)
        }else{
            par(mar = c(4,0,4,0))
        }
        plot.new()
        plot.window(xlim = c(1, length(hw_p_list[[ch]])), ylim = c(0, ymax))
        if(ch %% 2 == 0){
            draw.rectangle(0, length(hw_p_list[[ch]]), 0, ymax, fill = "gray", border = NA)
        }
        points(-log10(hw_p_list[[ch]]), type = "l")
        mtext(paste("Chr", ch), side = 1, line = 1, cex = 0.7)
    }
    mtext(main, side = 3, line = -2.5, outer = TRUE)
}

Overall HW

plot_sig_hw(hw_p, "Overall HW", ymax = 6)

Female HW

plot_sig_hw(hw_f_p, "Female HW", ymax = 6)

Male HW

plot_sig_hw(hw_m_p, "Male HW", ymax = 6)

Frequencies for most significant

Below we plot frequencies for the markers with the smallest p values. In each case there seems to be a depletion of B6/B6 genotypes. These markers are on Chr 2 between 172 and 174 Mb, and Chr 4 between 53 and 58 Mb.

sig.overall <- lapply(hw_p, function(x) which(x <= 10e-5))
sig.f <- lapply(hw_f_p, function(x) which(x <= 10e-5))

all.sig <- lapply(1:length(sig.overall), function(x) union(sig.overall[[x]], sig.f[[x]]))

for(ch in 1:length(all.sig)){
    if(length(all.sig[[ch]]) > 0){
        for(j in 1:length(all.sig[[ch]])){
            par(mfrow = c(1,3))
            marker.name = names(lps$pmap[[ch]])[all.sig[[ch]][j]]
            marker.pos <- lps$pmap[[ch]][marker.name]
            cat("#### Chr", ch, signif(marker.pos/1e6, 3), "Mb\n")
            plot_hw(marker_geno = genoprobs[[ch]][,,marker.name], marker_name = marker.name,
                main = paste0("Overall Chr", ch, ", ", signif(marker.pos/1e6, 3), "Mb"))
            plot_hw(marker_geno = genoprobs[[ch]][f.idx,,marker.name], marker_name = marker.name,
                main = paste0("Female Chr", ch, ", ", signif(marker.pos/1e6, 3), "Mb"))
            plot_hw(marker_geno = genoprobs[[ch]][m.idx,,marker.name], marker_name = marker.name,
                main = paste0("Male Chr", ch, ", ", signif(marker.pos/1e6, 3), "Mb"))
            cat("\n\n")
        }
    }
}

Chr 2 172 Mb

Chr 2 173 Mb

Chr 2 174 Mb

Chr 4 53.7 Mb

Chr 4 56.2 Mb

Chr 4 56.8 Mb

Chr 4 57.2 Mb

Chr 4 152 Mb

Chr 4 54.2 Mb

Chr 11 58.7 Mb

Chr 11 59.4 Mb

Chr 11 60.5 Mb

Chr 11 60.9 Mb

Chr 11 61.3 Mb

Chr 11 61.7 Mb

Chr 11 62.1 Mb

Chr 11 64.6 Mb

Chr 11 65.5 Mb

Chr 11 66.3 Mb

Chr 11 67.1 Mb

Fshr Locus

The following bar plot shows that the Fshr locus is not out of Hardy-Weinberg equilibrium overall or for either of the sexes.

par(mfrow = c(1,3))
fshr_result <- gene_hw("Fshr")

The following box plots show that we do see the sex-specific effect of the Fshr locus. There is heterosis in the females and no effect in the males.

par(mfrow = c(1,2))
boxplot(data_obj$pheno[f.idx,"Temp_PC1"]*-1~as.factor(round(fshr_result$genotype_calls[f.idx]*2)/2), 
    ylab = "Normalized Temp (AU)", xlab = "Genotype",  names = c("BB", "BM", "MM"),
    main = "Females")
boxplot(data_obj$pheno[m.idx,"Temp_PC1"]*1~as.factor(round(fshr_result$genotype_calls[m.idx]*2)/2),
    ylab = "Normalized Temp (AU)", xlab = "Genotype",  names = c("BB", "BM", "MM"),
    main = "Males")
mtext("Effect of Fshr locus on response to LPS", outer = TRUE, line = -1.5)

We then looked for long range LD with the Fshr locus that might indicate combinations of alleles that might be lethal. The qq plot for the p values is below. The p values are deflated, suggesting there is no long-range LD.

fshr_geno <- fshr_result$genotype
LD.p  <- vector(mode = "list", length = length(genoprobs)-1)
for(ch in 1:(length(genoprobs)-1)){
   LD.p[[ch]] <- sapply(1:dim(genoprobs[[ch]])[3], function(x) find_D(fshr_geno, genoprobs[[ch]][,,x]))
}

#show p value distribution for markers not on the same chromosome as Fshr
not.fshr <- setdiff(c(1:19), 17)
qqunif.plot(unlist(LD.p[not.fshr]))

The plots below show pairwise genotypes for markers with the smallest p values. For the markers on Chr 19, there appears to be a slight enrichment for double heterozygotes and double MM homozygotes. The p values aren’t great, though, so I’m not sure how far we’d want to take this.

pthresh <- 10^-2
long.range <- lapply(LD.p[not.fshr], function(x) which(x < pthresh))

for(ch in 1:length(not.fshr)){
    if(length(long.range[[ch]]) > 0){
        for(j in 1:length(long.range[[ch]])){
            marker.pos <- lps$pmap[[not.fshr[ch]]][j]
            plot_LD_freq(fshr_geno, genoprobs[[not.fshr[ch]]][,,long.range[[ch]][j]],
                main = paste0("Fshr locus and Chr", not.fshr[ch], ", ", 
                signif(marker.pos/1e6, 3), "Mb"))
        }
    }
}

ind_genotype <- function(marker.geno){
    ind.geno <- apply(marker.geno, 1, function(x) sum(x*c(0, 0.5, 1)))
    return(ind.geno)
}

plot_chr_cor <- function(chr1, chr2, genoprobs){

    chr1.idx <- which(names(genoprobs) == chr1)
    chr2.idx <- which(names(genoprobs) == chr2)

    chr1.genotypes <- apply(genoprobs[[chr1.idx]], 3, ind_genotype)
    chr2.genotypes <- apply(genoprobs[[chr2.idx]], 3, ind_genotype)

    chr2.names <- rep("", ncol(chr1.genotypes))
    chr2.names[median(1:ncol(chr1.genotypes))]  <- paste("Chr", chr1)

    chr4.names <- rep("", ncol(chr2.genotypes))
    chr4.names[median(1:ncol(chr2.genotypes))]  <- paste("Chr", chr2)

    colnames(chr1.genotypes) <- chr2.names
    colnames(chr2.genotypes) <- chr4.names
    geno.cor <- cor(cbind(chr1.genotypes, chr2.genotypes))
    imageWithText(abs(geno.cor), use.pheatmap.colors = TRUE, show.text = FALSE, col.text.rotation = 0,
        col.text.shift = 0.03, row.text.shift = 0.03, global.color.scale = TRUE, global.min = 0,
        global.max = 1)
    max.x <- ncol(geno.cor)
    draw.rectangle(max.x - ncol(chr2.genotypes), max.x, 1, ncol(chr2.genotypes), lwd = 3)
    draw.rectangle(1, ncol(chr1.genotypes), max.x - ncol(chr1.genotypes), max.x, lwd = 3)

    add_color_bar(abs(geno.cor), "topright", use.pheatmap.colors = TRUE, x.min.shift = 0,
        x.max.shift = -0.03, y.min.shift = 0.03, y.max.shift = 0.05, num.scale.pts = 5,
        label.buffer = 0.015, global.color.scale = TRUE, global.min = 0,
        global.max = 1)

    invisible(geno.cor)
}

chr.pairs <- pair.matrix(names(genoprobs))

pdf("~/Desktop/chr_ld.pdf")
for(i in 1:nrow(chr.pairs)){
    plot_chr_cor(chr.pairs[i,1],chr.pairs[i,2], genoprobs)
}
dev.off()

Zbp1 Locus

The following bar plot shows that the Zfbp1 locus does have some deviation from HW equilibrium, particularly in the females.

par(mfrow = c(1,3))
Zbp1_result <- gene_hw("Zbp1")